!pip install global_land_mask --user
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
# Importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plotly import tools
# import plotly.plotly as py
import chart_studio.plotly
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff
from IPython.display import HTML, Image
import plotly.express as px
import numpy
from global_land_mask import globe
from math import *
px.set_mapbox_access_token(open(".mapbox_token").read())
birds = pd.read_csv("bird_tracking.csv")
Are there differences in the altitudes of the Birds? Are there futher differences when factoring in time of day?
First, we establish "night" as 6pm to 5am, with day being everything else -- we make a column in the data frame to specify the time of day as "night" or "day", and additionally make separate data frames for day and night values.
Next, we create a new column for altitude, compressing our altitude values down to a more intuitive scale by utilizing the log function. All altitudes equal to or less than zero are simply put as 1.
birds["Time of Day"] = birds.apply(lambda birds: "Night" if ((pd.to_datetime(birds["date_time"]).hour >= 18) | (pd.to_datetime(birds["date_time"]).hour <= 5)) else "Day", axis = 1)
birds_night = birds[birds["Time of Day"] == "Night"]
birds_day = birds[birds["Time of Day"] == "Day"]
def convert_log(value):
return numpy.log2(value)
birds["altitude size marker"] = birds.apply(lambda d: convert_log(d["altitude"]) if d["altitude"] > 0 else 1, axis = 1)
For analysis, we determine the average altitude for each bird in general, during the day, and during the night.
birds.groupby("bird_name")["altitude"].mean().to_frame().reset_index()
birds_day.groupby("bird_name")["altitude"].mean().to_frame().reset_index()
birds_night.groupby("bird_name")["altitude"].mean().to_frame().reset_index()
Clearly, Nico likes to fly the highest, with Eric not too far behind. Sanne, compared to the other birds, likes to fly at much lower altitudes.
When factoring in day or night, it is clear that Nico and Eric fly lower during the night. Sanne, on the other hand, appears to not have a preference for altitude depending on time of day.
We can illustrate these themes in a boxplot.
fig_altitude_time = px.box(birds, x = "bird_name", y = "altitude size marker", color = "Time of Day")
fig_altitude_time.show()
Now, let's do the same thing for speed.
birds.groupby("bird_name")["speed_2d"].mean().to_frame().reset_index()
birds_day.groupby("bird_name")["speed_2d"].mean().to_frame().reset_index()
birds_night.groupby("bird_name")["speed_2d"].mean().to_frame().reset_index()
Nico appears to travel the fastest out of any of the birds, with Sanne next and Eric being the slowest. All the birds, travel faster during the day than during the night, but again, Sanne appears to have less of a preference depending on the time of day than the other two birds.
Again, we can visualize this in a boxplot. Since there are many outliers for speed, we limit the y-axis to just range 1 to 10.
fig_speed_time = px.box(birds, x = "bird_name", y = "speed_2d", color = "Time of Day")
fig_speed_time.update_layout(yaxis_range=[0,10])
fig_speed_time.show()
What about the maximums?
birds.groupby("bird_name")["speed_2d"].max().to_frame().reset_index()
birds_day.groupby("bird_name")["altitude"].max().to_frame().reset_index()
Interestingly, there seems to be an inverse relationship with the maximums of the birds' speed and altitude. Eric has the fastest recorded speed, but the lowest maximum altitude. Conversely, Nico has the highest recorded altitude, but the lowest maximum speed. Lastly, Sanne places in the middle with regards to both.
Furthermore, despite being the slowest bird on average, Eric has the fastest maximum speed. Also, despite being the lowest flying bird on average, Sanne does not have the lowest maximum altitude.
Does Terrain Type affect speed and altitude for each bird?
Here, we add a new column to the data frame based on the function below, which takes input coordinates and returns if the location is land. Thus, we can make a new column telling us whether each point is over land or over water, and make separate data frames based off this condition as well.
def over_water(lat, long):
if globe.is_land(lat, long) == True:
return "Over Land"
else:
return "Over Water"
birds["Terrain"] = birds.apply(lambda x: over_water(x["latitude"], x["longitude"]), axis = 1)
birds_land = birds[birds["Terrain"] == "Over Land"]
birds_water = birds[birds["Terrain"] == "Over Water"]
birds_land.groupby("bird_name")["speed_2d"].mean().to_frame().reset_index()
birds_water.groupby("bird_name")["speed_2d"].mean().to_frame().reset_index()
fig_speed_terrain = px.box(birds, x = "bird_name", y = "speed_2d", color = "Terrain")
fig_speed_terrain.update_layout(yaxis_range=[0,10])
fig_speed_terrain.show()
Over land, Sanne is by far the slowest average bird, which is somewhat surprising because Sanne was the middle bird with overall speed. Eric is the only bird that travels faster over land than over water, as the other birds travel much faster over water. Nico places as the fastest bird in each category, which makes sense considering Nico is the fastest bird overall on average.
Now let's see altitude.
birds_land.groupby("bird_name")["altitude"].mean().to_frame().reset_index()
birds_water.groupby("bird_name")["altitude"].mean().to_frame().reset_index()
fig_altitude_terrain = px.box(birds, x = "bird_name", y = "altitude size marker", color = "Terrain")
fig_altitude_terrain.show()
Nico, being the bird that flys highest on average, unsurprisingly flys the highest on average while over land. However, Nico flys over water at the lowest altitude on average. Again, we can see that out of all the birds, Sanne has the least variation of altitude. There is not a huge difference between Sanne's average altitudes over land or over water. But, it is clear that all the birds do fly lower over water to some extent.
So, to recap:
Nico:
-On average, travels at the highest altitude and highest speed of all 3 birds -Has the highest maximum altitude, but surprisingly has the lowest maximum speed -Highest average altitude over land, but the lowest average altitude over water -Has the highest average speed over land and over water
Eric:
-Places in the middle in average altitude, both during the day and night -Slowest bird on average (regardless of time of day), yet has the fastest maximum speed -Lowest maximum altitude by a wide margin, despite average altitude placing in the middle -Only bird to travel faster over land than over water -Places in the middle for average altitude over land, but has the highest average altitude over water
Sanne:
-Lowest average altitude regardless of time of day by a huge margin, yet has 2nd highest maximum altitude -Places in the middle for speed regardless of time of day, and places in the middle for maxiumum speed as well -By far the slowest bird on average over land, but essentially tied for the fastest bird on average over water -By far the lowest flying bird on average over land, but is not the lowest flying bird on average over water -Least amount of variance in altitude regardless of condition by a considerable amount
Factoring in Distances
To examine any possible differences in the distances covered by the birds, we can make a data frame comparing distances and time of day.
To do this, we can build functions to calculate distances between points, and furthermore calculate the absolute value of the distance covered by each bird.
def distance(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371
return c * r
def get_dist_bird(bird, df):
new_df = df[df["bird_name"] == bird]
new_df = new_df.assign(date=lambda d: pd.to_datetime(d['date_time']))
new_df["change_dist"] = 0
new_df = new_df.reset_index()
for row in range(len(new_df)-1):
change_dist = distance(new_df.loc[row, "longitude"], new_df.loc[row, "latitude"], new_df.loc[row+1, "longitude"], new_df.loc[row+1, "latitude"])
new_df.loc[row+1, "change_dist"] = change_dist
return round(new_df["change_dist"].sum(),2)
names = {'Bird Name': ["Eric", "Nico", "Sanne"]}
bird_distance_time = pd.DataFrame(data=names)
bird_distance_time["Distance at Day"] = 0
bird_distance_time["Distance at Day"].iloc[0] = get_dist_bird("Eric", birds_day)
bird_distance_time["Distance at Day"].iloc[1] = get_dist_bird("Nico", birds_day)
bird_distance_time["Distance at Day"].iloc[2] = get_dist_bird("Sanne", birds_day)
bird_distance_time["Distance at Night"] = 0
bird_distance_time["Distance at Night"].iloc[0] = get_dist_bird("Eric", birds_night)
bird_distance_time["Distance at Night"].iloc[1] = get_dist_bird("Nico", birds_night)
bird_distance_time["Distance at Night"].iloc[2] = get_dist_bird("Sanne", birds_night)
bird_distance_time["% Distance at Night"] = 0
bird_distance_time["% Distance at Night"].iloc[0] = round(bird_distance_time["Distance at Night"].iloc[0] / (bird_distance_time["Distance at Night"].iloc[0] + bird_distance_time["Distance at Day"].iloc[0]),2)
bird_distance_time["% Distance at Night"].iloc[1] = round(bird_distance_time["Distance at Night"].iloc[1] / (bird_distance_time["Distance at Night"].iloc[1] + bird_distance_time["Distance at Day"].iloc[1]),2)
bird_distance_time["% Distance at Night"].iloc[2] = round(bird_distance_time["Distance at Night"].iloc[2] / (bird_distance_time["Distance at Night"].iloc[2] + bird_distance_time["Distance at Day"].iloc[2]),2)
bird_distance_time["% Distance at Day"] = 0
bird_distance_time["% Distance at Day"].iloc[0] = round(bird_distance_time["Distance at Day"].iloc[0] / (bird_distance_time["Distance at Night"].iloc[0] + bird_distance_time["Distance at Day"].iloc[0]),2)
bird_distance_time["% Distance at Day"].iloc[1] = round(bird_distance_time["Distance at Day"].iloc[1] / (bird_distance_time["Distance at Night"].iloc[1] + bird_distance_time["Distance at Day"].iloc[1]),2)
bird_distance_time["% Distance at Day"].iloc[2] = round(bird_distance_time["Distance at Day"].iloc[2] / (bird_distance_time["Distance at Night"].iloc[2] + bird_distance_time["Distance at Day"].iloc[2]),2)
bird_distance_time
name_list = ['Eric','Nico','Sanne','all']
@interact(name = name_list)
def map1(name):
if name == 'all':
df1 = birds
else:
df1 = birds_day[birds_day['bird_name'] == name]
map_distance_day = px.scatter_mapbox(df1, lat="latitude", lon="longitude", color = "bird_name",
color_continuous_scale=px.colors.cyclical.IceFire, size_max= 7, zoom=2)
map_distance_day.show()
@interact(name = name_list)
def map2(name):
if name == 'all':
df1 = birds
else:
df1 = birds_night[birds_night['bird_name'] == name]
map_distance_night = px.scatter_mapbox(df1, lat="latitude", lon="longitude", color = "bird_name",
color_continuous_scale=px.colors.cyclical.IceFire, size_max= 7, zoom=2)
map_distance_night.show()
As shown, Sanne is the only bird that does not have a higher percentage of distance covered during the day than during the night. Also, Eric has the least amount of distance covered at day and at night by far
Now let's do the same process but for distance by terrain type.
names = {'Bird Name': ["Eric", "Nico", "Sanne"]}
bird_distance_terrain = pd.DataFrame(data=names)
bird_distance_terrain["Distance Over Land"] = 0
bird_distance_terrain["Distance Over Land"].iloc[0] = get_dist_bird("Eric", birds_land)
bird_distance_terrain["Distance Over Land"].iloc[1] = get_dist_bird("Nico", birds_land)
bird_distance_terrain["Distance Over Land"].iloc[2] = get_dist_bird("Sanne", birds_land)
bird_distance_terrain["Distance Over Water"] = 0
bird_distance_terrain["Distance Over Water"].iloc[0] = get_dist_bird("Eric", birds_water)
bird_distance_terrain["Distance Over Water"].iloc[1] = get_dist_bird("Nico", birds_water)
bird_distance_terrain["Distance Over Water"].iloc[2] = get_dist_bird("Sanne", birds_water)
bird_distance_terrain["% Distance Over Land"] = 0
bird_distance_terrain["% Distance Over Land"].iloc[0] = round(bird_distance_terrain["Distance Over Land"].iloc[0] / (bird_distance_terrain["Distance Over Land"].iloc[0] + bird_distance_terrain["Distance Over Water"].iloc[0]),2)
bird_distance_terrain["% Distance Over Land"].iloc[1] = round(bird_distance_terrain["Distance Over Land"].iloc[1] / (bird_distance_terrain["Distance Over Land"].iloc[1] + bird_distance_terrain["Distance Over Water"].iloc[1]),2)
bird_distance_terrain["% Distance Over Land"].iloc[2] = round(bird_distance_terrain["Distance Over Land"].iloc[2] / (bird_distance_terrain["Distance Over Land"].iloc[2] + bird_distance_terrain["Distance Over Water"].iloc[2]),2)
bird_distance_terrain["% Distance Over Water"] = 0
bird_distance_terrain["% Distance Over Water"].iloc[0] = round(bird_distance_terrain["Distance Over Water"].iloc[0] / (bird_distance_terrain["Distance Over Land"].iloc[0] + bird_distance_terrain["Distance Over Water"].iloc[0]),2)
bird_distance_terrain["% Distance Over Water"].iloc[1] = round(bird_distance_terrain["Distance Over Water"].iloc[1] / (bird_distance_terrain["Distance Over Land"].iloc[1] + bird_distance_terrain["Distance Over Water"].iloc[1]),2)
bird_distance_terrain["% Distance Over Water"].iloc[2] = round(bird_distance_terrain["Distance Over Water"].iloc[2] / (bird_distance_terrain["Distance Over Land"].iloc[2] + bird_distance_terrain["Distance Over Water"].iloc[2]),2)
bird_distance_terrain
@interact(name = name_list)
def map3(name):
if name == 'all':
df1 = birds
else:
df1 = birds_land[birds_land['bird_name'] == name]
map_distance_land = px.scatter_mapbox(df1, lat="latitude", lon="longitude", color = "bird_name",
color_continuous_scale=px.colors.cyclical.IceFire, size_max= 7, zoom=2)
map_distance_land.show()
@interact(name = name_list)
def map4(name):
if name == 'all':
df1 = birds
else:
df1 = birds_water[birds_water['bird_name'] == name]
map_distance_water = px.scatter_mapbox(df1, lat="latitude", lon="longitude", color = "bird_name",
color_continuous_scale=px.colors.cyclical.IceFire, size_max= 7, zoom=2)
map_distance_water.show()
As shown, Eric is the only bird to cover more distance over land than over water. However, he ironically has the lowest total distance covered over land. Nico and Sanne both have similar amounts of distance covered over land and over water, as well as percentages for these conditions.